In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from wordcloud import WordCloud
import squarify
In [2]:
df_inc = pd.read_csv("E:/Python/Project_Python/INCIDENT.csv")
df_inc.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2061 entries, 0 to 2060
Data columns (total 30 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Incident_ID         2061 non-null   object
 1   Sources             2061 non-null   object
 2   Number_News         686 non-null    object
 3   Media_Attention     725 non-null    object
 4   Reliability         2061 non-null   int64 
 5   Date                2061 non-null   object
 6   Quarter             2050 non-null   object
 7   School              2060 non-null   object
 8   City                2061 non-null   object
 9   State               2061 non-null   object
 10  School_Level        2037 non-null   object
 11  Location            2058 non-null   object
 12  Location_Type       2056 non-null   object
 13  During_School       2039 non-null   object
 14  Time_Period         1897 non-null   object
 15  First_Shot          1586 non-null   object
 16  Summary             2061 non-null   object
 17  Narrative           2046 non-null   object
 18  Situation           1901 non-null   object
 19  Targets             1757 non-null   object
 20  Accomplice          1765 non-null   object
 21  Hostages            2048 non-null   object
 22  Barricade           2048 non-null   object
 23  Officer_Involved    2054 non-null   object
 24  Bullied             1753 non-null   object
 25  Domestic_Violence   1906 non-null   object
 26  Gang_Related        1644 non-null   object
 27  Preplanned          1972 non-null   object
 28  Shots_Fired         1402 non-null   object
 29  Active_Shooter_FBI  1088 non-null   object
dtypes: int64(1), object(29)
memory usage: 483.2+ KB
In [3]:
df_inc.isnull().sum()
Out[3]:
Incident_ID              0
Sources                  0
Number_News           1375
Media_Attention       1336
Reliability              0
Date                     0
Quarter                 11
School                   1
City                     0
State                    0
School_Level            24
Location                 3
Location_Type            5
During_School           22
Time_Period            164
First_Shot             475
Summary                  0
Narrative               15
Situation              160
Targets                304
Accomplice             296
Hostages                13
Barricade               13
Officer_Involved         7
Bullied                308
Domestic_Violence      155
Gang_Related           417
Preplanned              89
Shots_Fired            659
Active_Shooter_FBI     973
dtype: int64
In [4]:
df_inct= df_inc.drop_duplicates(keep = 'first')
del_inc=['Number_News', 'Sources', 'Media_Attention', 'School']
df_incident=df_inct.drop(del_inc, axis=1)
df_incident.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2061 entries, 0 to 2060
Data columns (total 26 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Incident_ID         2061 non-null   object
 1   Reliability         2061 non-null   int64 
 2   Date                2061 non-null   object
 3   Quarter             2050 non-null   object
 4   City                2061 non-null   object
 5   State               2061 non-null   object
 6   School_Level        2037 non-null   object
 7   Location            2058 non-null   object
 8   Location_Type       2056 non-null   object
 9   During_School       2039 non-null   object
 10  Time_Period         1897 non-null   object
 11  First_Shot          1586 non-null   object
 12  Summary             2061 non-null   object
 13  Narrative           2046 non-null   object
 14  Situation           1901 non-null   object
 15  Targets             1757 non-null   object
 16  Accomplice          1765 non-null   object
 17  Hostages            2048 non-null   object
 18  Barricade           2048 non-null   object
 19  Officer_Involved    2054 non-null   object
 20  Bullied             1753 non-null   object
 21  Domestic_Violence   1906 non-null   object
 22  Gang_Related        1644 non-null   object
 23  Preplanned          1972 non-null   object
 24  Shots_Fired         1402 non-null   object
 25  Active_Shooter_FBI  1088 non-null   object
dtypes: int64(1), object(25)
memory usage: 418.8+ KB
In [5]:
pd.set_option("display.max_columns", None)
df_incident.head()
Out[5]:
Incident_ID Reliability Date Quarter City State School_Level Location Location_Type During_School Time_Period First_Shot Summary Narrative Situation Targets Accomplice Hostages Barricade Officer_Involved Bullied Domestic_Violence Gang_Related Preplanned Shots_Fired Active_Shooter_FBI
0 20220601CAULL 4 01-06-2022 Summer Los Angeles CA High Front of School Outside on School Property Yes Dismissal 15:25:00 Student shot in front of school at dismissal 16-year-olds student was shot in front of the ... Escalation of Dispute Victims Targeted No No No No No No Yes No NaN No
1 20220531LAMON 4 31-05-2022 Spring New Orleans LA High Off School Property Off School Property Yes Sport Event 11:45:00 3 people shot following high school graduation An 80-year-old woman was killed and two men we... Escalation of Dispute Both No No No No No No No No 99 No
2 20220530CAHEL 4 30-05-2022 Spring La Mesa CA High Parking Lot Outside on School Property No Evening 18:20:00 Adult man shot in school parking lot Adult man was shot in the school parking lot. ... NaN Victims Targeted No No No No No No NaN No 99 No
3 20220529ILDAC 4 29-05-2022 Spring Chicago IL Elementary Front of School Outside on School Property No Night 01:30:00 5 wounded when 97 shots fired in front of school. 5 people were wounded when 97 shots were fired... Escalation of Dispute Both Yes No No No No No NaN No 97 No
4 20220526SCMEG 4 26-05-2022 Spring Goose Creek SC K-8 Parking Lot Outside on School Property No Night 00:05:00 Man fatally shot in parking lot, found by staf... 31-year-old male (off duty sheriffs deputy) wa... NaN Victims Targeted NaN No No No No NaN NaN No 99 No
In [6]:
df_sh = pd.read_csv("E:/Python/Project_Python/SHOOTER.csv")
df_shooter= df_sh.drop_duplicates(keep = 'first')
df_shooter.info()
<class 'pandas.core.frame.DataFrame'>
Index: 2171 entries, 0 to 2284
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   incidentid         2171 non-null   object
 1   age                1769 non-null   object
 2   gender             1831 non-null   object
 3   race               614 non-null    object
 4   schoolaffiliation  2042 non-null   object
 5   shooteroutcome     2166 non-null   object
 6   shooterdied        2164 non-null   object
 7   injury             327 non-null    object
 8   chargesfiled       660 non-null    object
 9   verdict            172 non-null    object
 10  minorchargedadult  230 non-null    object
 11  criminalhistory    180 non-null    object
dtypes: object(12)
memory usage: 220.5+ KB
In [7]:
df_shooter.head()
Out[7]:
incidentid age gender race schoolaffiliation shooteroutcome shooterdied injury chargesfiled verdict minorchargedadult criminalhistory
0 19700105DCHIW 15 Male NaN Student Unknown No NaN NaN NaN NaN NaN
1 19700105DCSOW NaN Male NaN Student Surrendered No NaN NaN NaN NaN NaN
2 19700105DCUNW NaN Male NaN Student Fled/Escaped No NaN NaN NaN NaN NaN
3 19700206OHJOC 18 Male Hispanic Student Unknown No NaN NaN 1-5 years NaN NaN
4 19700323CADAL 16 Male Hispanic Student Surrendered No NaN Assault with intent to commit murder NaN NaN NaN
In [8]:
df_shooter.isnull().sum()
Out[8]:
incidentid              0
age                   402
gender                340
race                 1557
schoolaffiliation     129
shooteroutcome          5
shooterdied             7
injury               1844
chargesfiled         1511
verdict              1999
minorchargedadult    1941
criminalhistory      1991
dtype: int64
In [9]:
del_sh=['injury', 'chargesfiled', 'verdict', 'minorchargedadult', 'criminalhistory', 'race']
df_shooter=df_sh.drop(del_sh, axis=1)
df_shooter.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2285 entries, 0 to 2284
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   incidentid         2285 non-null   object
 1   age                1860 non-null   object
 2   gender             1924 non-null   object
 3   schoolaffiliation  2145 non-null   object
 4   shooteroutcome     2280 non-null   object
 5   shooterdied        2276 non-null   object
dtypes: object(6)
memory usage: 107.2+ KB
In [10]:
df_vic = pd.read_csv("E:/Python/Project_Python/VICTIM.csv")
df_victim= df_vic.drop_duplicates(keep = 'first')
df_victim.info()
<class 'pandas.core.frame.DataFrame'>
Index: 2483 entries, 0 to 3092
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   incidentid         2483 non-null   object
 1   race               286 non-null    object
 2   injury             2014 non-null   object
 3   gender             2116 non-null   object
 4   schoolaffiliation  2195 non-null   object
 5   age                2231 non-null   object
dtypes: object(6)
memory usage: 135.8+ KB
In [11]:
df_victim.isnull().sum()
Out[11]:
incidentid              0
race                 2197
injury                469
gender                367
schoolaffiliation     288
age                   252
dtype: int64
In [12]:
del_vic=['race']
df_victim=df_vic.drop(del_vic, axis=1)
df_victim
Out[12]:
incidentid injury gender schoolaffiliation age
0 19700105DCHIW Fatal Male Student 15
1 19700105DCSOW Wounded Male Student Teen
2 19700105DCUNW NaN Male Unknown 16
3 19700206OHJOC Wounded Male Student 18
4 19700323CADAL Wounded Male Security Guard 44
... ... ... ... ... ...
3088 20220530CAHEL Wounded Male No Relation 20
3089 20220531LAMON Fatal Female Relative 80
3090 20220531LAMON Wounded Male Relative Adult
3091 20220531LAMON Wounded Male Relative Adult
3092 20220601CAULL Wounded Male Student 16

3093 rows × 5 columns

In [13]:
df_we = pd.read_csv("E:/Python/Project_Python/WEAPON.csv")
df_weapon= df_we.drop_duplicates(keep = 'first')
df_weapon.info()
<class 'pandas.core.frame.DataFrame'>
Index: 2050 entries, 0 to 2062
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   incidentid     2050 non-null   object
 1   weaponcaliber  1433 non-null   object
 2   weapondetails  85 non-null     object
 3   weapontype     1983 non-null   object
dtypes: object(4)
memory usage: 80.1+ KB
In [14]:
df_weapon.isnull().sum()
Out[14]:
incidentid          0
weaponcaliber     617
weapondetails    1965
weapontype         67
dtype: int64
In [15]:
del_we=['weapondetails']
df_weapon=df_weapon.drop(del_we, axis=1)
df_weapon.info()
<class 'pandas.core.frame.DataFrame'>
Index: 2050 entries, 0 to 2062
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   incidentid     2050 non-null   object
 1   weaponcaliber  1433 non-null   object
 2   weapontype     1983 non-null   object
dtypes: object(3)
memory usage: 64.1+ KB
In [16]:
df_inc_sh = pd.merge(df_incident, df_shooter, how= 'left', left_on='Incident_ID', right_on = 'incidentid')
df_inc_sh.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2294 entries, 0 to 2293
Data columns (total 32 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Incident_ID         2294 non-null   object
 1   Reliability         2294 non-null   int64 
 2   Date                2294 non-null   object
 3   Quarter             2282 non-null   object
 4   City                2294 non-null   object
 5   State               2294 non-null   object
 6   School_Level        2269 non-null   object
 7   Location            2291 non-null   object
 8   Location_Type       2289 non-null   object
 9   During_School       2268 non-null   object
 10  Time_Period         2117 non-null   object
 11  First_Shot          1766 non-null   object
 12  Summary             2294 non-null   object
 13  Narrative           2278 non-null   object
 14  Situation           2114 non-null   object
 15  Targets             1968 non-null   object
 16  Accomplice          1977 non-null   object
 17  Hostages            2279 non-null   object
 18  Barricade           2279 non-null   object
 19  Officer_Involved    2285 non-null   object
 20  Bullied             1974 non-null   object
 21  Domestic_Violence   2135 non-null   object
 22  Gang_Related        1827 non-null   object
 23  Preplanned          2200 non-null   object
 24  Shots_Fired         1534 non-null   object
 25  Active_Shooter_FBI  1221 non-null   object
 26  incidentid          2286 non-null   object
 27  age                 1861 non-null   object
 28  gender              1925 non-null   object
 29  schoolaffiliation   2146 non-null   object
 30  shooteroutcome      2281 non-null   object
 31  shooterdied         2277 non-null   object
dtypes: int64(1), object(31)
memory usage: 573.6+ KB
In [17]:
df_inc_sh_vic = pd.merge(df_inc_sh, df_victim, how= 'left', left_on='Incident_ID', right_on = 'incidentid')
df_inc_sh_vic.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3941 entries, 0 to 3940
Data columns (total 37 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Incident_ID          3941 non-null   object
 1   Reliability          3941 non-null   int64 
 2   Date                 3941 non-null   object
 3   Quarter              3926 non-null   object
 4   City                 3941 non-null   object
 5   State                3941 non-null   object
 6   School_Level         3911 non-null   object
 7   Location             3936 non-null   object
 8   Location_Type        3934 non-null   object
 9   During_School        3903 non-null   object
 10  Time_Period          3636 non-null   object
 11  First_Shot           3153 non-null   object
 12  Summary              3941 non-null   object
 13  Narrative            3906 non-null   object
 14  Situation            3717 non-null   object
 15  Targets              3462 non-null   object
 16  Accomplice           3431 non-null   object
 17  Hostages             3916 non-null   object
 18  Barricade            3915 non-null   object
 19  Officer_Involved     3928 non-null   object
 20  Bullied              3384 non-null   object
 21  Domestic_Violence    3704 non-null   object
 22  Gang_Related         3296 non-null   object
 23  Preplanned           3805 non-null   object
 24  Shots_Fired          2549 non-null   object
 25  Active_Shooter_FBI   2408 non-null   object
 26  incidentid_x         3930 non-null   object
 27  age_x                3290 non-null   object
 28  gender_x             3419 non-null   object
 29  schoolaffiliation_x  3680 non-null   object
 30  shooteroutcome       3923 non-null   object
 31  shooterdied          3909 non-null   object
 32  incidentid_y         3718 non-null   object
 33  injury               3212 non-null   object
 34  gender_y             2817 non-null   object
 35  schoolaffiliation_y  3352 non-null   object
 36  age_y                3398 non-null   object
dtypes: int64(1), object(36)
memory usage: 1.1+ MB
In [18]:
df_inc_sh_vic_we = pd.merge(df_inc_sh_vic, df_weapon, how= 'left', left_on='Incident_ID', right_on = 'incidentid')
df_inc_sh_vic_we.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4510 entries, 0 to 4509
Data columns (total 40 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Incident_ID          4510 non-null   object
 1   Reliability          4510 non-null   int64 
 2   Date                 4510 non-null   object
 3   Quarter              4495 non-null   object
 4   City                 4510 non-null   object
 5   State                4510 non-null   object
 6   School_Level         4480 non-null   object
 7   Location             4505 non-null   object
 8   Location_Type        4503 non-null   object
 9   During_School        4472 non-null   object
 10  Time_Period          4166 non-null   object
 11  First_Shot           3697 non-null   object
 12  Summary              4510 non-null   object
 13  Narrative            4471 non-null   object
 14  Situation            4276 non-null   object
 15  Targets              4024 non-null   object
 16  Accomplice           3992 non-null   object
 17  Hostages             4483 non-null   object
 18  Barricade            4484 non-null   object
 19  Officer_Involved     4497 non-null   object
 20  Bullied              3829 non-null   object
 21  Domestic_Violence    4269 non-null   object
 22  Gang_Related         3833 non-null   object
 23  Preplanned           4374 non-null   object
 24  Shots_Fired          2948 non-null   object
 25  Active_Shooter_FBI   2973 non-null   object
 26  incidentid_x         4499 non-null   object
 27  age_x                3837 non-null   object
 28  gender_x             3972 non-null   object
 29  schoolaffiliation_x  4226 non-null   object
 30  shooteroutcome       4492 non-null   object
 31  shooterdied          4478 non-null   object
 32  incidentid_y         4277 non-null   object
 33  injury               3767 non-null   object
 34  gender_y             3163 non-null   object
 35  schoolaffiliation_y  3888 non-null   object
 36  age_y                3954 non-null   object
 37  incidentid           4271 non-null   object
 38  weaponcaliber        3237 non-null   object
 39  weapontype           4176 non-null   object
dtypes: int64(1), object(39)
memory usage: 1.4+ MB
In [19]:
df_inc_sh_vic_we.head()
Out[19]:
Incident_ID Reliability Date Quarter City State School_Level Location Location_Type During_School Time_Period First_Shot Summary Narrative Situation Targets Accomplice Hostages Barricade Officer_Involved Bullied Domestic_Violence Gang_Related Preplanned Shots_Fired Active_Shooter_FBI incidentid_x age_x gender_x schoolaffiliation_x shooteroutcome shooterdied incidentid_y injury gender_y schoolaffiliation_y age_y incidentid weaponcaliber weapontype
0 20220601CAULL 4 01-06-2022 Summer Los Angeles CA High Front of School Outside on School Property Yes Dismissal 15:25:00 Student shot in front of school at dismissal 16-year-olds student was shot in front of the ... Escalation of Dispute Victims Targeted No No No No No No Yes No NaN No 20220601CAULL NaN Male NaN Fled/Escaped No 20220601CAULL Wounded Male Student 16 20220601CAULL NaN Handgun
1 20220531LAMON 4 31-05-2022 Spring New Orleans LA High Off School Property Off School Property Yes Sport Event 11:45:00 3 people shot following high school graduation An 80-year-old woman was killed and two men we... Escalation of Dispute Both No No No No No No No No 99 No 20220531LAMON NaN NaN NaN Fled/Escaped No 20220531LAMON Fatal Female Relative 80 20220531LAMON NaN Handgun
2 20220531LAMON 4 31-05-2022 Spring New Orleans LA High Off School Property Off School Property Yes Sport Event 11:45:00 3 people shot following high school graduation An 80-year-old woman was killed and two men we... Escalation of Dispute Both No No No No No No No No 99 No 20220531LAMON NaN NaN NaN Fled/Escaped No 20220531LAMON Wounded Male Relative Adult 20220531LAMON NaN Handgun
3 20220531LAMON 4 31-05-2022 Spring New Orleans LA High Off School Property Off School Property Yes Sport Event 11:45:00 3 people shot following high school graduation An 80-year-old woman was killed and two men we... Escalation of Dispute Both No No No No No No No No 99 No 20220531LAMON NaN NaN NaN Fled/Escaped No 20220531LAMON Wounded Male Relative Adult 20220531LAMON NaN Handgun
4 20220530CAHEL 4 30-05-2022 Spring La Mesa CA High Parking Lot Outside on School Property No Evening 18:20:00 Adult man shot in school parking lot Adult man was shot in the school parking lot. ... NaN Victims Targeted No No No No No No NaN No 99 No 20220530CAHEL Adult Male No Relation Fled/Escaped No 20220530CAHEL Wounded Male No Relation 20 20220530CAHEL NaN Handgun
In [20]:
df_final= df_inc_sh_vic_we.drop_duplicates(keep = 'first')
df_final.info()
<class 'pandas.core.frame.DataFrame'>
Index: 3127 entries, 0 to 4509
Data columns (total 40 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Incident_ID          3127 non-null   object
 1   Reliability          3127 non-null   int64 
 2   Date                 3127 non-null   object
 3   Quarter              3115 non-null   object
 4   City                 3127 non-null   object
 5   State                3127 non-null   object
 6   School_Level         3098 non-null   object
 7   Location             3122 non-null   object
 8   Location_Type        3120 non-null   object
 9   During_School        3101 non-null   object
 10  Time_Period          2907 non-null   object
 11  First_Shot           2504 non-null   object
 12  Summary              3127 non-null   object
 13  Narrative            3102 non-null   object
 14  Situation            2932 non-null   object
 15  Targets              2738 non-null   object
 16  Accomplice           2715 non-null   object
 17  Hostages             3105 non-null   object
 18  Barricade            3107 non-null   object
 19  Officer_Involved     3119 non-null   object
 20  Bullied              2683 non-null   object
 21  Domestic_Violence    2943 non-null   object
 22  Gang_Related         2588 non-null   object
 23  Preplanned           3022 non-null   object
 24  Shots_Fired          2030 non-null   object
 25  Active_Shooter_FBI   1892 non-null   object
 26  incidentid_x         3118 non-null   object
 27  age_x                2617 non-null   object
 28  gender_x             2716 non-null   object
 29  schoolaffiliation_x  2937 non-null   object
 30  shooteroutcome       3111 non-null   object
 31  shooterdied          3109 non-null   object
 32  incidentid_y         2907 non-null   object
 33  injury               2426 non-null   object
 34  gender_y             2459 non-null   object
 35  schoolaffiliation_y  2595 non-null   object
 36  age_y                2641 non-null   object
 37  incidentid           3064 non-null   object
 38  weaponcaliber        2227 non-null   object
 39  weapontype           2979 non-null   object
dtypes: int64(1), object(39)
memory usage: 1001.6+ KB
In [21]:
pd.set_option("display.max_columns", None)
df_final.head()
Out[21]:
Incident_ID Reliability Date Quarter City State School_Level Location Location_Type During_School Time_Period First_Shot Summary Narrative Situation Targets Accomplice Hostages Barricade Officer_Involved Bullied Domestic_Violence Gang_Related Preplanned Shots_Fired Active_Shooter_FBI incidentid_x age_x gender_x schoolaffiliation_x shooteroutcome shooterdied incidentid_y injury gender_y schoolaffiliation_y age_y incidentid weaponcaliber weapontype
0 20220601CAULL 4 01-06-2022 Summer Los Angeles CA High Front of School Outside on School Property Yes Dismissal 15:25:00 Student shot in front of school at dismissal 16-year-olds student was shot in front of the ... Escalation of Dispute Victims Targeted No No No No No No Yes No NaN No 20220601CAULL NaN Male NaN Fled/Escaped No 20220601CAULL Wounded Male Student 16 20220601CAULL NaN Handgun
1 20220531LAMON 4 31-05-2022 Spring New Orleans LA High Off School Property Off School Property Yes Sport Event 11:45:00 3 people shot following high school graduation An 80-year-old woman was killed and two men we... Escalation of Dispute Both No No No No No No No No 99 No 20220531LAMON NaN NaN NaN Fled/Escaped No 20220531LAMON Fatal Female Relative 80 20220531LAMON NaN Handgun
2 20220531LAMON 4 31-05-2022 Spring New Orleans LA High Off School Property Off School Property Yes Sport Event 11:45:00 3 people shot following high school graduation An 80-year-old woman was killed and two men we... Escalation of Dispute Both No No No No No No No No 99 No 20220531LAMON NaN NaN NaN Fled/Escaped No 20220531LAMON Wounded Male Relative Adult 20220531LAMON NaN Handgun
4 20220530CAHEL 4 30-05-2022 Spring La Mesa CA High Parking Lot Outside on School Property No Evening 18:20:00 Adult man shot in school parking lot Adult man was shot in the school parking lot. ... NaN Victims Targeted No No No No No No NaN No 99 No 20220530CAHEL Adult Male No Relation Fled/Escaped No 20220530CAHEL Wounded Male No Relation 20 20220530CAHEL NaN Handgun
5 20220529ILDAC 4 29-05-2022 Spring Chicago IL Elementary Front of School Outside on School Property No Night 01:30:00 5 wounded when 97 shots fired in front of school. 5 people were wounded when 97 shots were fired... Escalation of Dispute Both Yes No No No No No NaN No 97 No 20220529ILDAC NaN NaN NaN Fled/Escaped No 20220529ILDAC Wounded Female NaN 16 20220529ILDAC NaN Rifle
In [22]:
df1_inc_final=df_incident.groupby(['Quarter']).count()
sns.barplot(x='Quarter', y='Incident_ID', hue='Quarter', data=df1_inc_final, palette='viridis', dodge=False, legend=False)
plt.title('Incident occuring per Quarter')
plt.xlabel('Quarter')
plt.ylabel('Count of Incident')
plt.show()
No description has been provided for this image
In [23]:
red_fed = df_incident.groupby('Quarter').count()[['Shots_Fired']]
red_fed = red_fed.reset_index()
plt.figure(figsize=(8, 6))
wedges, texts, autotexts = plt.pie(red_fed['Shots_Fired'], labels=red_fed['Quarter'], autopct='%1.1f%%', startangle=90, colors=sns.color_palette('viridis', len(red_fed['Quarter'])))
centre_circle = plt.Circle((0, 0), 0.70, fc='white')
plt.gca().add_artist(centre_circle)
plt.setp(autotexts, size=10, weight="bold")
plt.setp(texts, size=12)
plt.title('Shots Fired by Quarter', fontsize=14)
plt.show()
No description has been provided for this image
In [24]:
df2_inc_final=df_incident.groupby(['State']).count()
plt.figure(figsize=(12, 8))
sns.barplot(x='State', y='Incident_ID', hue='State', data=df2_inc_final, palette='viridis', dodge=False, legend=False)
plt.title('Incident Occurrences Per State', fontsize=16)
plt.xlabel('State', fontsize=14)
plt.ylabel('Count of Incidents', fontsize=14)
plt.xticks(rotation=90, fontsize=12) 
plt.yticks(fontsize=12)  
plt.tight_layout()
plt.show()
No description has been provided for this image
In [25]:
df3_inc_final = df_incident[['State', 'Active_Shooter_FBI']]
image = px.scatter_geo(df3_inc_final, locations='State', locationmode='USA-states', color='Active_Shooter_FBI',  hover_name='State', 
                       title='Active Shooter present in US States', color_discrete_map={'Yes': 'green', 'No': 'red'})
image.update_layout(geo=dict(scope='usa', showland=True, landcolor="rgb(240, 240, 240)"))
image.show()
In [26]:
df4_inc_final = df_incident['City'].value_counts().head(20).reset_index()
df4_inc_final.columns = ['City', 'Count']
heatmap_data =df4_inc_final.set_index('City')
plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_data, annot=True, fmt='d', cmap='coolwarm', linewidths=0.5, cbar_kws={'label': 'Crime Count'})
plt.title('Top 20 Crime by City', fontsize=16)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [27]:
df5_inc_final = ' '.join(df_incident['City'])
wordcloud = WordCloud(background_color='white').generate(df5_inc_final)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
No description has been provided for this image
In [28]:
df7_inc_final = df_incident[['State', 'School_Level']]
fig = px.scatter_geo(df7_inc_final, locations="State", locationmode="USA-states", color="School_Level", hover_name="State", title="State wise crime at School Level",)
fig.update_layout(geo=dict(scope="usa", showland=True, landcolor="rgb(240, 240, 240)"))
fig.show()
In [29]:
df_incident['Shots_Fired'] = pd.to_numeric(df_incident['Shots_Fired'], errors='coerce')
shots_by_time = df_incident.groupby('Time_Period')['Shots_Fired'].sum().reset_index()
shots_by_time = shots_by_time.sort_values(by='Shots_Fired', ascending=False)
plt.figure(figsize=(10, 6))
sns.lineplot(x='Time_Period', y='Shots_Fired', data=shots_by_time, palette="orange")
plt.title('Shots Fired in Different Time Periods', fontsize=14)
plt.xlabel('Time Period', fontsize=12)
plt.ylabel('Total Shots Fired', fontsize=12)
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
C:\Users\Parag Chittora\AppData\Local\Temp\ipykernel_11632\3673799178.py:5: UserWarning:

Ignoring `palette` because no `hue` variable has been assigned.

No description has been provided for this image
In [30]:
numerical_data = df_incident.select_dtypes(include=['float64', 'int64'])
correlation_matrix = numerical_data.corr()
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="coolwarm", cbar=True)
plt.title('Correlation Heatmap', fontsize=16)
plt.show()
No description has been provided for this image
In [31]:
df1_sh_final = df_shooter.groupby('gender')['incidentid'].count()
df1_sh_final = df1_sh_final.reset_index()
plt.figure(figsize=(8, 8))
wedges, texts, autotexts = plt.pie(df1_sh_final['incidentid'], labels=df1_sh_final['gender'], autopct='%1.1f%%', startangle=90, colors=sns.color_palette('viridis', len(df1_sh_final['gender'])))
plt.setp(autotexts, size=10, weight="bold")
plt.setp(texts, size=12)
plt.title("Count of incidents Gender of shooter wise ", fontsize=16)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [32]:
df_shooter['shooteroutcome'] = df_shooter['shooteroutcome'].replace('Attempted Suicide', 'Suicide')
df_shooter['shooteroutcome'] = df_shooter['shooteroutcome'].replace('Unknown', 'Other')
df2_sh_final = df_shooter.groupby('shooteroutcome')['incidentid'].count()
df2_sh_final = df2_sh_final.reset_index()
plt.figure(figsize=(12, 8))
sns.barplot(x='shooteroutcome', y='incidentid', hue='shooteroutcome', data=df2_sh_final, palette='viridis', dodge=False, legend=False)
plt.title('Incident occuring per shooter outcome')
plt.xlabel('shooter outcome')
plt.xticks(rotation=90, fontsize=12) 
plt.ylabel('Count of Incident')
plt.show()
No description has been provided for this image
In [33]:
df4_sh_final = df_shooter.groupby('schoolaffiliation')['incidentid'].count()
df4_sh_final = df4_sh_final.reset_index()
plt.figure(figsize=(12, 8))
sns.barplot(x='incidentid', y='schoolaffiliation', hue='schoolaffiliation', data=df4_sh_final, palette='viridis', dodge=False, legend=False)
plt.title('School affiliation of shooter')
plt.xlabel('Count of Incident')
plt.xticks(rotation=90, fontsize=12) 
plt.ylabel('School affiliation')
plt.show()
No description has been provided for this image
In [34]:
df_shooter['shooteroutcome'] = df_shooter['shooteroutcome'].replace('Attempted Suicide', 'Suicide')
df_shooter['shooteroutcome'] = df_shooter['shooteroutcome'].replace('Unknown', 'Other')
df2_sh_final = df_shooter.groupby('shooteroutcome')['incidentid'].count()
df2_sh_final = df2_sh_final.reset_index()
plt.figure(figsize=(8, 6))
heatmap_data = df2_sh_final.set_index('shooteroutcome').T
sns.heatmap(heatmap_data, annot=True, fmt="d", cmap="viridis", cbar_kws={'label': 'Count of Incidents'})
plt.title('Heat Map: Incidents per Shooter Outcome', fontsize=14)
plt.xlabel('Shooter Outcome', fontsize=12)
plt.ylabel('Incident Count', fontsize=12)
plt.xticks(rotation=45, fontsize=10)
plt.yticks(fontsize=10)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [35]:
injury_counts = df_victim['injury'].value_counts(dropna=False).reset_index()
injury_counts.columns = ['Injury Type', 'Count']
injury_counts['Injury Type'] = injury_counts['Injury Type'].fillna('Unknown')
custom_colors = ['#FF9999', '#66B3FF', '#99FF99', '#FFCC99', '#C2C2F0']
plt.figure(figsize=(8, 6))
plt.pie(
    injury_counts['Count'], 
    labels=injury_counts['Injury Type'], 
    autopct='%1.1f%%', 
    startangle=90, 
    colors=custom_colors[:len(injury_counts)],
    textprops={'fontsize': 10}
)
plt.title('Distribution of Injury Types', fontsize=14)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [36]:
df_weapon['weapontype'] = df_weapon['weapontype'].replace('Mulitiple Handguns', 'Multiple Handguns')
df_weapon['weapontype'] = df_weapon['weapontype'].replace('Multiple Unknown', 'Unknown')
df_weapon['weapontype'] = df_weapon['weapontype'].replace('Rifle ', 'Rifle')
df_weapon['weapontype'] = df_weapon['weapontype'].replace('No Data', 'Other')
df_weapon['weapontype'] = df_weapon['weapontype'].replace('Unknown', 'Other')
In [37]:
gender_purchase = df_weapon.groupby('weapontype').count()[['incidentid']].reset_index()
plt.figure(figsize=(8, 6), dpi=100)
sns.barplot(data=gender_purchase, x='weapontype', y='incidentid', hue = 'weapontype', palette='viridis', dodge=False)
plt.xticks(rotation=90, fontsize=12)
Out[37]:
([0, 1, 2, 3, 4, 5],
 [Text(0, 0, 'Handgun'),
  Text(1, 0, 'Multiple Handguns'),
  Text(2, 0, 'Multiple Rifles'),
  Text(3, 0, 'Other'),
  Text(4, 0, 'Rifle'),
  Text(5, 0, 'Shotgun')])
No description has been provided for this image
In [38]:
df4 = df_final[['Incident_ID', 'Reliability']]
df4_remove_duplicate= df4.drop_duplicates(keep = 'first')
print(df4_remove_duplicate.info)
df4_final=df4_remove_duplicate.groupby(['Reliability']).count()
print(df4_final)
print("Null values", + df4_remove_duplicate['Reliability'].isnull().sum())
plt.figure(figsize=(12, 8))
sns.barplot(x='Reliability', y='Incident_ID', hue='Reliability', data=df4_final, palette='viridis', dodge=False, legend=False)
plt.title('Reliability of Incident Occurred ', fontsize=16)
plt.xlabel('Reliability', fontsize=14)
plt.ylabel('Count of Incidents', fontsize=14) 
plt.tight_layout()
plt.show()
<bound method DataFrame.info of         Incident_ID  Reliability
0     20220601CAULL            4
1     20220531LAMON            4
4     20220530CAHEL            4
5     20220529ILDAC            4
35    20220526SCMEG            4
...             ...          ...
4502  19700323CADAL            2
4506  19700206OHJOC            2
4507  19700105DCUNW            2
4508  19700105DCSOW            3
4509  19700105DCHIW            3

[2060 rows x 2 columns]>
             Incident_ID
Reliability             
1                    107
2                    937
3                    424
4                    527
5                     65
Null values 0
No description has been provided for this image
In [39]:
df5 = df_final[['Incident_ID', 'Date']]
df5_remove_duplicate= df5.drop_duplicates(keep = 'first')
df5_remove_duplicate.head()
Out[39]:
Incident_ID Date
0 20220601CAULL 01-06-2022
1 20220531LAMON 31-05-2022
4 20220530CAHEL 30-05-2022
5 20220529ILDAC 29-05-2022
35 20220526SCMEG 26-05-2022
In [40]:
df5_remove_duplicate = df5_remove_duplicate.copy()
df5_remove_duplicate['Date'] = pd.to_datetime(df5_remove_duplicate['Date'], format="%d-%m-%Y")
df5_remove_duplicate['Month'] = df5_remove_duplicate['Date'].dt.month
df5_remove_duplicate['Year'] = df5_remove_duplicate['Date'].dt.year
In [41]:
df_year=df5_remove_duplicate.groupby(['Year']).count()
df_year
Out[41]:
Incident_ID Date Month
Year
1970 20 20 20
1971 21 21 21
1972 18 18 18
1973 18 18 18
1974 16 16 16
1975 14 14 14
1976 11 11 11
1977 16 16 16
1978 16 16 16
1979 16 16 16
1980 20 20 20
1981 17 17 17
1982 18 18 18
1983 25 25 25
1984 25 25 25
1985 20 20 20
1986 16 16 16
1987 25 25 25
1988 38 38 38
1989 19 19 19
1990 18 18 18
1991 32 32 32
1992 33 33 33
1993 47 47 47
1994 39 39 39
1995 22 22 22
1996 21 21 21
1997 26 26 26
1998 28 28 28
1999 22 22 22
2000 29 29 29
2001 25 25 25
2002 19 19 19
2003 32 32 32
2004 35 35 35
2005 47 47 47
2006 59 59 59
2007 44 44 44
2008 35 35 35
2009 31 31 31
2010 15 15 15
2011 16 16 16
2012 20 20 20
2013 34 34 34
2014 46 46 46
2015 40 40 40
2016 50 50 50
2017 58 58 58
2018 118 118 118
2019 119 119 119
2020 114 114 114
2021 250 250 250
2022 147 147 147
In [42]:
df_year['Year'] = pd.to_datetime(df_year['Date'], dayfirst=True, errors='coerce').dt.year
year_counts = df5_remove_duplicate['Year'].value_counts().sort_index()
sns.set_theme(style="whitegrid")
plt.figure(figsize=(12, 6))
sns.lineplot(x=year_counts.index, y=year_counts.values, linewidth=2.5, color='b')
plt.title("School Shootings Over the Years", fontsize=18)
plt.xlabel("Year", fontsize=14)
plt.ylabel("Number of Incidents", fontsize=14)
plt.xticks(rotation=45)
plt.grid(visible=True, linestyle='-', alpha=0.7)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [43]:
df4 = df_final[['Incident_ID', 'gender_y']]
df4_remove_duplicate= df4.drop_duplicates(keep = 'first')
df4_final=df4_remove_duplicate.groupby(['gender_y']).count()

plt.figure(figsize=(12, 8))
sns.barplot(x='gender_y', y='Incident_ID', hue='gender_y', data=df4_final, palette='viridis', dodge=False, legend=False)

plt.title('Incidents vs Gender ', fontsize=16)
plt.xlabel('Gender', fontsize=14)
plt.ylabel('Count of Incidents', fontsize=14)
plt.xticks(rotation=45, fontsize=12) 
plt.yticks(fontsize=12)  

plt.tight_layout()
plt.show()
No description has been provided for this image
In [44]:
df4 = df_final[['Incident_ID', 'Hostages']]
df4_remove_duplicate= df4.drop_duplicates(keep = 'first')
df4_final=df4_remove_duplicate.groupby(['Hostages']).count().head(20)
plt.figure(figsize=(12, 8))
sns.barplot(x='Hostages', y='Incident_ID', hue='Hostages', data=df4_final, palette='viridis', dodge=False, legend=False)
plt.title('Hostages Innvolvement ', fontsize=16)
plt.xlabel('Presence', fontsize=14)
plt.ylabel('Count of Incidents', fontsize=14)
plt.xticks(rotation=45, fontsize=12) 
plt.yticks(fontsize=12)  
plt.tight_layout()
plt.show()
No description has been provided for this image
In [45]:
df_new1=df_final[['State','weapontype']].drop_duplicates(keep = 'first')
plt.figure(figsize=(12,8))
sns.boxplot(x='State', y='weapontype', data=df_new1, hue='State', palette='Set2', legend=False)
plt.title('Weapons used in different states', fontsize=14)
plt.xlabel('States', fontsize=12)
plt.xticks(rotation=90, fontsize=12)
plt.ylabel('Weapon Types', fontsize=12)
plt.show()
No description has been provided for this image
In [46]:
df_new2=df_final['Targets']
df_new2
summary_counts = df_new2.value_counts()
plt.figure(figsize=(8, 8))
plt.pie(summary_counts, labels=summary_counts.index, autopct='%1.1f%%', startangle=140, colors=['#ff9999','#66b3ff','#99ff99'])
plt.title('Targets Distribution')
plt.show()
No description has been provided for this image
In [47]:
df_new3=df_final['Time_Period']
df_new3
summary_counts = df_new3.value_counts()
plt.figure(figsize=(16, 16))
plt.pie(summary_counts, labels=summary_counts.index, autopct='%1.1f%%', textprops={'fontsize': 10}, startangle=140, colors=['#ff9999','#66b3ff','#99ff99'])
plt.title('Shooting Time Peiods')
plt.show()
No description has been provided for this image
In [ ]: